from pdf2docx import Converter
from pdf2image import convert_from_path
import pytesseract

class FormatConverter:
    @classmethod
    def pdf_to_docx(cls, input_path, output_path):
        cv = Converter(input_path)
        cv.convert(output_path)
        cv.close()
    
    @classmethod
    def pdf_to_images(cls, input_path, output_dir):
        images = convert_from_path(input_path)
        for i, img in enumerate(images):
            img.save(f"{output_dir}/page_{i+1}.png")

    @classmethod
    def image_to_text(cls, image_path):
        return pytesseract.image_to_string(Image.open(image_path))